*APCED - Data preparation for NH cohort analysis
*Hyesung “Hace” Oh
*Started 11/07/2021
*Last worked on: 03/14/2023
*The purpose of this .do file is prepare the analytical data file for the NH cohort analysis

********************************************************************************

*GENERAL*
clear all
macro drop _all
set more off
set scrollbufsize 2000000 
set linesize 200

*DATE*
global currdate: display %td_CCYY_NN_DD date(c(current_date), "DMY")
global date = subinstr(trim("$currdate"), " ", "_", .)

*LOG*
cd "P:\apced\shared\Aim2\source"
capture log close 
log using ..\output\dataprep_nh_cohort_$date.log, text replace
ssc install statastates, replace


*MAIN*
capture program drop main
program define main

	clean_nh_cohort
	merge_outcomes
	zip_code_chars
	nh_data_collapse
	do ..\source\zipbynh_hace.do
	ratings_merge 
	impute_fac_vars 
	nh_outcomes_merge
	cnty_mkt_vars 
	log close

end
**********

capture program drop clean_nh_cohort
program define clean_nh_cohort

use "P:\apced\shared\Aim2\source\adrd_ffs_cohort_all.dta", clear
keep if apc_involve != .
data_clean
keep if cohort == 2
save "P:\apced\shared\Aim2\source\adrd_ffs_cohort_nh.dta", replace
use "P:\apced\shared\Aim2\source\adrd_ffs_cohort_nh.dta", clear
	
end
**********

capture program drop data_clean
program define data_clean


sum cohort if cohort == 0
	local n_0 = `r(N)'
	disp ""
	disp "There are `n_0' individuals who are in the Community Cohort"
	disp ""

sum cohort if cohort == 1
	local n_1 = `r(N)'
	disp ""
	disp "There are `n_1' individuals who are in the NH cohort"
	disp ""
	
capture drop race_*
tab race, gen(race_)
capture drop white 
capture drop black 
capture drop hispanic 
capture drop other_race
rename race_1 white
rename race_2 black
rename race_3 hispanic
rename race_4 other_race

tab res_rurality, gen(res_)
capture drop not_metro_micro
capture drop metro
capture drop micro
rename res_1 not_metro_micro
rename res_2 metro
rename res_3 micro 

tab fac_rurality, gen(fac_res_)
capture drop fac_not_metro_micro
capture drop fac_metro
capture drop fac_micro
rename fac_res_1 fac_not_metro_micro
rename fac_res_2 fac_metro
rename fac_res_3 fac_micro

egen nh_group = group(hee_provn)

end
**********

capture program drop merge_outcomes
program define merge_outcomes

use "P:\apced\shared\Aim2\source\adrd_ffs_cohort_nh.dta", replace
capture drop _merge
merge 1:1 bene_id_18900 using "P:\apced\shared\Data retrieval\decd_cohort_outcomes_new.dta"
tab _merge
capture drop outcome_merge_ind
gen outcome_merge_ind = _merge
drop _merge
keep if outcome_merge_ind == 3
save ..\source\merge_nh_outcomes.dta, replace

end
**********

capture program drop zip_code_chars
program define zip_code_chars

	use "P:\apced\shared\Data retrieval\zipcode_sdi.dta", clear
	capture drop fac_zip_code
	gen fac_zip_code = hkzip
	tempfile zippy_sdi
	save `zippy_sdi'
	
	use "P:\apced\shared\Data retrieval\zipyear_share_long.dta", clear
	capture drop fac_zip_code
	gen fac_zip_code = hkzip
	keep if year >= 2016
	tempfile zippy_share
	save `zippy_share'
		
	use "..\source\merge_nh_outcomes.dta", clear 
	capture drop fac_zip_code
	gen fac_zip_code = PROV2905
		
	capture drop _merge
	merge m:1 fac_zip_code using `zippy_sdi'
	capture drop zip_sdi_nh_merge
	gen zip_sdi_nh_merge = _merge
	drop _merge
	forvalues i = 1/3 {
	    count if zip_sdi_nh_merge == `i'
		local merge_`i' = `r(N)'
	}
	
	display "The number of records from the NH cohort file that did not find a match with the zip code SDI file is " `merge_1' " records."
	display ""
	display "The number of records from the zip code SDI file that did not find a match with the NH cohort file is " `merge_2' " records."
	display ""
	display "The number of records that found a match between the NH cohort file and the zip code SDI was " `merge_3' " records."
	display ""
	
	capture drop _merge
	merge m:1 fac_zip_code year using `zippy_share'
	capture drop zip_share_nh_merge
	gen zip_share_nh_merge = _merge
	drop _merge
	forvalues i = 1/3 {
		count if zip_share_nh_merge == `i'
		local merge_`i' = `r(N)'
	}
	
	display "The number of records from the NH cohort file that did not find a match with the zip code share file is " `merge_1' " records."
	display ""
	display "The number of records from the zip code share file that did not find a match with the NH cohort file is " `merge_2' " records."
	display ""
	display "The number of records that found a match between the NH cohort file and the zip code SDI was " `merge_3' " records."
	display ""
	
	quietly count if zip_share_nh_merge == 3 & zip_sdi_nh_merge == 3
	display "We are now working with a sample of " `r(N)' " individual-level records"
	keep if zip_share_nh_merge == 3 & zip_sdi_nh_merge == 3
	save ..\source\merge_nh_outcomes_zip.dta, replace
	capture erase ..\source\merge_nh_outcomes.dta
	
end
**********


capture program drop nh_data_collapse
program define nh_data_collapse

	use ..\source\merge_nh_outcomes_zip.dta, clear
	
	capture drop zip_hmo_share zip_dual_share zip_sdi
	gen zip_hmo_share = hmo_share
	gen zip_dual_share = dual_share
	gen zip_sdi = sdi_score


	collapse (first) hee_provn PROV1680 PROV0475 fac_zip_code PROV3230 state county (sum) apc_total np_total pa_total all_total agg_adm (mean) occpct pcthmo totbeds paymcare payother multifac profit adm_bed fac_not_metro_micro fac_metro fac_micro avgage pctfem pctblack_mds3 pcthisp_mds3 pctwhite_mds3 pctunder65 pctlowcfs pcthighcfs pctbedft_mds3 pctwalking pctincont_bowel_mds3 pctcath_mds3 pctchf pcthyper pctschiz_bipol pctvent_mds3 pctuti pctfall30_mds3 pctobese pctrx_psych_mds3 pct_nonwhite dchrppd rn2nrs prov_unique zip_hmo_share zip_dual_share zip_sdi, by(nh_group year)
	
	capture drop fac_apc_share
	gen fac_apc_share = apc_total/all_total
	xtile fac_tert = fac_apc_share, nquantiles(3)
	
	save ..\source\nh_level_apc_file.dta, replace 
	use ..\source\nh_level_apc_file.dta, clear
	*keep PROV0475 PROV1680 fac_apc_share fac_tert year
	capture drop prov0475 
	capture drop prov1680
	gen str120 prov0475 = PROV0475
	gen str120 prov1680 = PROV1680
	
	save ..\source\apc_share_file.dta, replace //For merge with the individual-level outcomes file later

end



capture program drop ratings_merge
program define ratings_merge

	use "P:\apced\shared\common data files\nhc-accptid-moyr.dta", clear
	capture drop year
		gen year = hkyear
	keep if hkmonth == 4
	capture drop _merge
	capture drop hee_provn 
		gen hee_provn = accpt_id
		
	merge 1:m hee_provn year using "..\source\apc_share_file.dta", gen(penult_merge)
	
	capture drop penult_tag
		egen penult_tag = tag(accpt_id year)
		keep if penult_tag == 1
		
	
	
	save penult_nh_af.dta, replace

end


capture program drop impute_fac_vars
program define impute_fac_vars

	use "P:\apced\shared\Data retrieval\annosc1992_2018.dta", clear
	capture drop prov1680
		gen prov1680 = PROV1680
	*keep if year >= 2010
	capture drop fac_year_tag 
		egen fac_year_tag = tag(prov1680 year)
	keep if fac_year_tag == 1
	capture drop PROV0475
		gen PROV0475 = prov0475
		
	capture drop _merge
	
	capture drop final_tag
	egen final_tag = tag(accpt_id year)
	keep if final_tag == 1
	merge 1:1 accpt_id year using penult_nh_af.dta, gen(long_merge)
	
		
	missings report agg_adm facpoor totbeds paymcaid payother multifac profit hospbase rnhrppd lpnhrppd cnahrppd zip_hmo_share overall_rating
	
	sort accpt_id year
	*if inrange(year, 2016, 2018) {
	foreach var in agg_adm hospbase rnhrppd lpnhrppd cnahrppd overall_rating {
	    
		capture drop `var'_prev_impute
			gen `var'_prev_impute = `var'
			replace `var'_prev_impute = `var'[_n-1] if missing(`var') & !missing(`var'[_n-1])
			
		count if missing()
			
	}
	
	sort accpt_id year
	*if inrange(year, 2016, 2018) {
	foreach var in agg_adm hospbase rnhrppd lpnhrppd cnahrppd overall_rating {
	    
		replace `var'_prev_impute = `var'[_n-2] if missing(`var') & !missing(`var'[_n-2])
		
	}
	
	sort accpt_id year
	*if inrange(year, 2016, 2018) {
	foreach var in agg_adm hospbase rnhrppd lpnhrppd cnahrppd overall_rating {
	    
		replace `var'_prev_impute = `var'[_n-3] if missing(`var') & !missing(`var'[_n-3])
		
	}
	
	sort accpt_id year
	*if inrange(year, 2016, 2018) {
	foreach var in agg_adm hospbase rnhrppd lpnhrppd cnahrppd overall_rating {
	    
		replace `var'_prev_impute = `var'[_n-4] if missing(`var') & !missing(`var'[_n-4])
		
	}
	
	sort accpt_id year
	*if inrange(year, 2016, 2018) {
	foreach var in agg_adm hospbase rnhrppd lpnhrppd cnahrppd overall_rating {
	    
		replace `var'_prev_impute = `var'[_n-5] if missing(`var') & !missing(`var'[_n-5])
		
	}
	
/*	sort accpt_id year
	*if inrange(year, 2016, 2018) {
	foreach var in agg_adm hospbase rnhrppd lpnhrppd cnahrppd overall_rating {
	    
		replace `var'_prev_impute = `var'[_n-6] if missing(`var') & !missing(`var'[_n-6])
		
	}
*/
			
missings report *_impute if inrange(year,2016,2018)
	*Variables that require imputation: agg_adm hospbase rnhrppd lpnhrppd cnahrppd overall_rating
	
	capture drop hee_provn
		gen hee_provn = accpt_id
	
	save ..\source\nh_tomerge_file.dta, replace
	
	*hospbase_prev_impute rnhrppd_prev_impute lpnhrppd_prev_impute cnahrppd_prev_impute overall_rating_prev_impute

end

capture program drop nh_outcomes_merge
program define nh_outcomes_merge

	use ..\source\merge_nh_outcomes_zip.dta, clear
	capture drop semifinal_merge
	merge m:1 hee_provn year using nh_tomerge_file.dta, gen(semifinal_merge)
	keep if semifinal_merge == 3
	
	foreach v in agg_adm hospbase rnhrppd lpnhrppd cnahrppd overall_rating {
		count if missing(`v'_prev_impute)
	}
	
	statastates, abbreviation(state)
		drop _merge
		drop state
		tostring state_fips, gen(state) format("%02.0f")
	capture drop res_cnty 
		egen res_cnty = concat(state county)

	
	save ..\source\semifinal_nh_file.dta, replace

	
end


****
capture program drop cnty_mkt_vars //CHANGE THIS UP!
program define cnty_mkt_vars

	tempfile ahrf_temp
	use "P:\apced\shared\Data retrieval\ahrf_long.dta", clear
	capture drop res_cnty 
		gen res_cnty = fips
	destring year, replace
	save `ahrf_temp'
	use ..\source\semifinal_nh_file.dta
	merge m:1 res_cnty year using `ahrf_temp', keepusing(snf_bed hha)
	drop if _merge == 2
	save ..\source\final_nh_af.dta, replace


end
****



main
	








